Baseline prediction for homework type

The baseline prediction method we use for predicting which homework the notebook came from uses the popular plagiarism detector JPlag.

We feed each noteboook through our pipeline to eliminate variable names, string declarations, comments, and import names


In [1]:
# First step is to load a balanced dataset of homeworks
import sys
home_directory = '/dfs/scratch2/fcipollone'
sys.path.append(home_directory)

import numpy as np
from nbminer.notebook_miner import NotebookMiner

hw_filenames = np.load('../homework_names_jplag_combined_per_student.npy')
min_val = min([len(temp) for temp in hw_filenames])
print(min_val)
hw_notebooks = [[NotebookMiner(filename) for filename in temp[:min_val]] for temp in hw_filenames]


59

In [2]:
# Now we do the transformation, storing the results into the variable hw_code
from nbminer.pipeline.pipeline import Pipeline
from nbminer.features.features import Features
from nbminer.preprocess.get_ast_features import GetASTFeatures
from nbminer.preprocess.get_imports import GetImports
import tqdm

hw_code = []
for corp in tqdm.tqdm(hw_notebooks):
    temp = []
    for nb in corp:
        a = Features([nb])
        gastf = GetASTFeatures()
        gi = GetImports()
        pipe = Pipeline([gastf, gi])
        a = pipe.transform(a)
        code = a.get_notebook(0).get_all_asts()
        lines = code.split('\n')
        lines = [line for line in lines if line != '']
        temp.append('\n\n'.join(lines))
    hw_code.append(temp)


  0%|          | 0/6 [00:00<?, ?it/s]
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db4af98>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daf4048>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da66278>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da661d0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dafe748>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dafe828>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dabf5f8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dabf630>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da74128>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da740b8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da60518>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da609b0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dadd5c0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db07e48>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daccfd0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dab7e48>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dafe0f0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db12898>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da66c18>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dafa828>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db194a8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dabfc88>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dab1e48>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dab1fd0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dac41d0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dafec88>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dad4d68>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dad82b0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db00c88>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da9f588>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dadddd8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da9f6d8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dafc8d0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db0d0f0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daeb160>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dadae10>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daf7da0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db0bd68>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daf35c0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daf3128>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dac4208>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daf3390>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dad0d68>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dadd7f0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dafbe10>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dafb198>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dad46a0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db0d1d0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db045f8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db04d68>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db4ab00>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db1a080>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db04438>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daf7e48>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dafc160>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dafc748>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db11400>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db117b8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da28630>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daa80f0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daf5be0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daf56a0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dad6438>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dad6ac8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db0b940>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db1d278>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db14c50>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dac82e8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dad4748>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dad47b8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db064e0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db06630>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dafee10>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db15390>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da8d160>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daeb0b8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daf5518>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dacb518>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dac82e8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dacb4e0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dada898>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dada240>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db16128>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db164a8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db16518>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dab1080>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daad5f8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daad208>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daadb00>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daad630>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daa4e10>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dac22e8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dac5cc0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db07c18>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db035c0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db03668>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db03160>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db03b38>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dac4ba8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dafe240>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dafe2e8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dafe278>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daffc88>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dacb048>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dac0978>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dac00b8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dac2518>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db0d2b0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dafee10>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dacbc88>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daeb160>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daeb518>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da95240>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dabfd68>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da95550>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da95860>
1
 17%|█▋        | 1/6 [00:07<00:37,  7.54s/it]
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dafea58>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dafecc0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db4afd0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da96b70>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db4a940>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dabff28>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db0fe10>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dacb6a0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dab4ef0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db15c88>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daa0048>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dad8390>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db0c0f0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dac85f8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daa9f60>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dac89b0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dab44a8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dab4588>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db11dd8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db11f28>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dafdc18>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dafdc88>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dab1940>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dab1d30>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dad83c8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dac29e8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dafd240>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daee978>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daa0c50>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daf5048>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da7c2e8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da7c470>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db05358>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da7c240>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dadeb70>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dacdac8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dad4cf8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dac8128>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daa0278>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daa0400>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dadd358>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da9f7f0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dafe0f0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dafe160>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db0d6d8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dab2d68>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daa9860>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dabe8d0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daa02e8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daa0710>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db01320>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db01c88>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dafebe0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daa4898>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dafaf98>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db01e10>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dac3f28>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db19828>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daf7b38>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daf7470>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da7f2e8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da7f320>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dab24e0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dabf8d0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db19cc0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db19160>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daddbe0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dab4898>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da7c7f0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db03dd8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dabf7b8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dabf5c0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db19160>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db19048>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857d9b3ba8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857d9b1d30>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db4b160>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db4b128>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dab1a20>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dab19e8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db02f98>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db0b358>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db04be0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db04240>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da9fa90>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dabe908>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dabeb70>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dabe9e8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dadc358>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dadcd30>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daa8b70>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dab2668>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db1a278>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db1a1d0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daa0f28>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da77358>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db11ac8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db11dd8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dac5b70>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daa9b70>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da8abe0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da8acf8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dabe710>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daf94e0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da8aa90>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da8ada0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db0ff98>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db0f710>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daf7438>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daf7eb8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da8ada0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dad84e0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dad88d0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da83ac8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db4b160>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db4b128>
1
 33%|███▎      | 2/6 [00:15<00:30,  7.62s/it]
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db14a20>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da60fd0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db02e48>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da60f28>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dabf278>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db02f60>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daf16d8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daf1da0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dafe710>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db015f8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dabea58>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da94b38>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daf1d30>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db4afd0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db4b2b0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db4b160>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dacd8d0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da85b38>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db1d160>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db147f0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da89668>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da5dda0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da2f940>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da2f518>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da720f0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da72198>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da69b00>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da69c18>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da69128>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da63160>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dace5c0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db06ef0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da89c18>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dafeeb8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daa9da0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daa9dd8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dad0278>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dad0390>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daa79b0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daa7a20>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dacdcf8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dacd908>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da36358>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db0c0f0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daf3080>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da36d30>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db1d4a8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db1dd68>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db01160>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db1df98>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db4b2b0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dadce10>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da9be80>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da63da0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dac13c8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dad75c0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dade780>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dade978>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da699b0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da692b0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dad48d0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dad4048>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dad0f28>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dafe2b0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daa7860>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db053c8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db1d278>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db1a668>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dad4cf8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db1d198>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daf3a20>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db115f8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daf5be0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dacc2b0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db02358>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db02748>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db11898>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da7e908>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db18cc0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db18cf8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dad4470>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db01e80>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dacd710>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dacd4e0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dac7ef0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db14898>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db04240>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db1abe0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db1a240>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daf5128>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db1a470>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dacfc18>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daf4d30>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daf3c50>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da75128>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dacc7f0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daf16a0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daf1860>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dad1c88>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dad1cc0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db064a8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db022b0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dafa2b0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db04940>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db14400>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dab2be0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db4b2b0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db4b160>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dafac88>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dafa898>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dabb7b8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dacf208>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db0fc88>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db0fb70>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daf7470>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db052b0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db11f60>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daaf9b0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daf9748>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daf91d0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da96630>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da96eb8>
1
 50%|█████     | 3/6 [00:19<00:19,  6.56s/it]
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dafa0b8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dacd0b8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dafabe0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dafa5f8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daf62b0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daf6588>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db0f7b8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db0f6a0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dab2710>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db18ba8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db0f668>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db1d9e8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db5de48>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dac86d8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db0c2b0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dab2898>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dac80f0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da7e5c0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dab1978>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dab1668>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db0fa90>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db1ad68>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dacd9e8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dacd588>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daf1438>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dab22b0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daafc50>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daafb70>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dacf748>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db05da0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db06f98>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dacfba8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db4b160>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db4b2b0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db18518>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db02d68>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da7e668>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da7e390>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daa9390>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db1aba8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db0c048>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dac7198>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db0fef0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db06438>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daafb38>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db18748>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daaf2e8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dacf358>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daac390>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daac8d0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dac7e80>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daf5c88>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daa0be0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dad1d30>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dad1048>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db4af98>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da5f4a8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db1d0f0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dae56d8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dae5630>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dae58d0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dae5978>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db0c4e0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db0e780>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daf61d0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db118d0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da5fa58>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da5f978>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daf4a90>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db018d0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db0dcf8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dae5710>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dabbc50>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da82f60>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daf1588>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daf16a0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daf96d8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daf97f0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daf9160>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daf90b8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da5f668>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db07ef0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db5de48>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db0dba8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db4ada0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db4ae10>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daf1ef0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daf1e48>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daf9d68>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db0dac8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db0e668>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db00320>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daf11d0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dafab00>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da88208>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da88470>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daf7a90>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db04470>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dafa9b0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dafa8d0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db116a0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db119e8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db00828>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db00b38>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db156a0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db15160>
1
 67%|██████▋   | 4/6 [00:22<00:11,  5.67s/it]
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daf3cf8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daf36d8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da82588>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db0ef60>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daf7208>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daf36d8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daf9c50>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db04b00>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daf3160>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daf3588>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db04390>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db1d080>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db0cac8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db0cd30>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daf90f0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db0e8d0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db15cc0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daf7f98>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db4b2b0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daced30>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dae5ef0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dae5978>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daf2630>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dafaa20>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da5fbe0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db014e0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da87dd8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db0c828>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db072e8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db07a20>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dacd518>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dad9f60>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db07a58>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db4af28>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daf69b0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db15c88>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da87dd8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db15160>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da2ba58>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da2bb00>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dafaac8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dafac50>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db04dd8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db04d68>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db0cb38>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dad2b00>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dae56d8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db00c50>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daf5d68>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dad4fd0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db4b160>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dad2ef0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db056a0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dafea58>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db4b128>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db18390>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db1b160>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da38550>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db1deb8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db11fd0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dad81d0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daa4390>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db4ae10>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db17160>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db0da58>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dada780>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dad8240>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db1d6d8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daf6b00>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da46780>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db4b2b0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daa4278>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dae54e0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db04860>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dad14e0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daaa3c8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db17e10>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db14d68>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db0fcf8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db17278>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da62048>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da62470>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db11240>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db11898>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da626a0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da62438>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db4b2b0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da62f98>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da47780>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da47cf8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daf66a0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daf9278>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daa40f0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daa4940>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db4b160>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db0c7f0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db08e10>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daaaeb8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db0ddd8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db0db00>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daaa0f0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daaad30>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da38470>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db0c7f0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daf5898>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daeec50>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da38e80>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daf3a20>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dadd860>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dadda58>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dac26a0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daf9320>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db4af28>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db4ae10>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dab9ba8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dab9400>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daf70b8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daf7b38>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da62f98>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db0c748>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daf9ba8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da46160>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db4b2b0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db14828>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dafe5c0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daa4208>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daf9438>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daf9e48>
1
 83%|████████▎ | 5/6 [00:27<00:05,  5.44s/it]
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db14fd0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db14828>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daf6470>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daf60b8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dada780>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dada4e0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dadc358>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dadc710>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daf74a8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daf72e8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db4ada0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da38ac8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db17908>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daf90f0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dafedd8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daf5908>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daf92e8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db144a8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dad8d68>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daf3358>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daeed30>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db0ecc0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da86f28>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da86d68>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dac8748>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da74908>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db05f98>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da98400>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db5de48>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db1b208>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da986a0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da98400>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da79b00>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da92358>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daf3cf8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daf37b8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da8cb00>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da98ba8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db11e10>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db18278>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da627b8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dac4668>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da47b38>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dac44a8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db04dd8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da6eb00>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db4b2b0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db044e0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da628d0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da622e8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da38c18>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da8cac8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db01198>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daaa0b8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db0f668>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db1d5c0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dac4a58>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dad1fd0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dadd198>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db5de48>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dac6048>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dab59e8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db4b128>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db4b2b0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daa27f0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daa7e80>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dac48d0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dac4ef0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db4b128>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db4b2b0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dad4080>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857da8b4e0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daee160>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db19198>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dab9048>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daee278>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db5de48>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daa7080>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dad0940>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dad09e8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dadd048>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dadd2e8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daa7208>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dac42b0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daca7f0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daa7048>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daa7908>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857daa7fd0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dacc828>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dab9358>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daad7f0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dad1128>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dadd780>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dad6438>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daa4198>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db195f8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dad9cf8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dad9550>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daaab38>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dab9780>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dad6940>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dad6d68>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dab98d0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dab9e10>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db19c18>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dadd630>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857daad588>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dab9c88>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dac4be0>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dacda58>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db1ddd8>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db19828>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857db1de80>
1
100%|██████████| 6/6 [00:31<00:00,  5.21s/it]
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dad01d0>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dac8198>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dac8b38>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857da83b38>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857dac4dd8>
1
<nbminer.preprocess.get_ast_features.GetASTFeatures object at 0x7f857dad8198>
1
<nbminer.preprocess.get_imports.GetImports object at 0x7f857db056d8>
1


In [3]:
# Print an example to see what the result of the transformation looks like.
print(hw_code[0][0])


import import as import

import import as import

pd.options.mode.chained_assignment = None

var = pd.Series([632, 1638, 569, 115])

var

var.values

var.index

var = pd.Series([632, 1638, 569, 115], index=['string', 'string', 'string',

    'string'])

var

var['string']

var[[var.endswith('string') for var in var.index]]

[var.endswith('string') for var in var.index]

var[0]

var.name = 'string'

var.index.name = 'string'

var

np.log10(var)

var[var > 1000]

var = {'string': 632, 'string': 1638, 'string': 569, 'string': 115}

pd.Series(var)

var = pd.Series(var, index=['string', 'string', 'string', 'string'])

var

var.isnull()

var + var

var = pd.DataFrame({'string': [632, 1638, 569, 115, 433, 1130, 754, 555],

    'string': [1, 1, 1, 1, 2, 2, 2, 2], 'string': ['string', 'string',

    'string', 'string', 'string', 'string', 'string', 'string']})

var

var[['string', 'string', 'string']]

var.columns

var.dtypes

var['string']

var.patient

type(var.phylum)

type(var[['string']])

var.loc[3]

var.head()

var.tail(3)

var.shape

var = pd.DataFrame([{'string': 1, 'string': 'string', 'string': 632}, {

    'string': 1, 'string': 'string', 'string': 1638}, {'string': 1,

    'string': 'string', 'string': 569}, {'string': 1, 'string': 'string',

    'string': 115}, {'string': 2, 'string': 'string', 'string': 433}, {

    'string': 2, 'string': 'string', 'string': 1130}, {'string': 2,

    'string': 'string', 'string': 754}, {'string': 2, 'string': 'string',

    'string': 555}])

var

var = var.value

var

var[5] = 0

var

var = var.value.copy()

var[5] = 1000

var

var.value[[3, 4, 6]] = [14, 21, 5]

var

var['string'] = 2013

var

var.treatment = 1

var

var.treatment

var[var.value > 1000]

var[var.phylum.str.endswith('string') & (var.value > 1000)]

var = pd.Series([0] * 4 + [1] * 2)

var

var['string'] = var

var

var = ['string', 'string', 'string', 'string']

var['string'] = var

var['string'] = ['string'] * len(var)

var

var = var.drop('string', axis=1)

var

var.values

var = pd.DataFrame({'string': [1, 2, 3], 'string': [0.4, -1.0, 4.5]})

var.values

var.index

var.index[0] = 15

var.index = var.index

var

var().system('string')

var = pd.read_csv('string')

var

pd.read_csv('string', header=None).head()

var = pd.read_table('string', sep='string')

var = pd.read_csv('string', index_col=['string', 'string'])

var.head()

pd.read_csv('string', skiprows=[3, 4, 6]).head()

pd.read_csv('string', nrows=4)

pd.read_csv('string', chunksize=14)

var = pd.read_csv('string', chunksize=14)

var = pd.Series({var.Taxon[0]: var.Tissue.mean() for var in var})

var

var().system('string')

pd.read_csv('string').head(20)

pd.isnull(pd.read_csv('string')).head(20)

pd.read_csv('string', na_values=['string', -99999]).head(20)

var = pd.read_excel('string', sheetname='string', header=None)

var.head()

var = pd.read_csv('string', index_col='string')

var.head()

var = var.player + var.year.astype(str)

var = var.copy()

var.index = var

var.head()

var.index.is_unique

pd.Series(var.index).value_counts()

var.loc['string']

var = var.player + var.team + var.year.astype(str)

var = var.copy()

var.index = var

var.head()

var.index.is_unique

var.reindex(var.index[::-1]).head()

var = range(var.index.values.min(), var.index.values.max())

var.reindex(var).head()

var.reindex(var, method='string', columns=['string', 'string']).head()

var.reindex(var, fill_value='string', columns=['string']).head()

var.shape

var.drop([89525, 89526])

var.drop(['string', 'string'], axis=1)

var = var.h

var

var[:3]

var[['string', 'string']]

var['string':'string']

var['string':'string'] = 5

var

var[['string', 'string']]

var[var.ab > 500]

var.query('string')

var = 450

var.query('string')

var.loc['string', ['string', 'string', 'string', 'string']]

var.loc[:'string', ('string')]

var.iloc[:5, 5:8]

var = ['string', 'string']

var = var[var.team.isin(var)]

var

len(var.player.unique())

var().run_line_magic('string', 'string')

import import as import

import import as import

import import as import

sns.set_context('string')

sns.barplot(y=var.team.unique(), x=var.team.value_counts())

var = var.loc[var.year == 2006, 'string']

var.index = var.player[var.year == 2006]

var = var.loc[var.year == 2007, 'string']

var.index = var.player[var.year == 2007]

var

var = var + var

var

var[var.notnull()]

var.add(var, fill_value=0)

var.hr - var.hr.max()

var.loc[89521, 'string']

var = var[['string', 'string', 'string', 'string']]

var = var - var.loc[89521]

var[:10]

var.apply(np.median)

def range_calc(x):

    return var.max() - var.min()

var = lambda x: var.max() - var.min()

var.apply(var)

def slugging(x):

    var = var['string'] - var['string'] - var['string'] - var['string'

        ] + 2 * var['string'] + 3 * var['string'] + 4 * var['string']

    var = var['string'] + 1e-06

    return var / var

var.apply(var, axis=1).round(3)

var.sort_index().head()

var.sort_index(ascending=False).head()

var.sort_index(axis=1).head()

var.hr.sort_values()

var[['string', 'string', 'string']].sort_values(ascending=[False, True], by

    =['string', 'string']).head(10)

var.hr.rank()

pd.Series([100, 100]).rank()

var.hr.rank(method='string')

var.rank(ascending=False).head()

var[['string', 'string', 'string']].rank(ascending=False).head()

def on_base(x):

    """string"""

    var = var['string'] + var['string'] + var['string']

    var = var['string'] + var['string'] + var['string'] + var['string'] + 1e-06

    return var / var

var.apply(var, axis=1).round(3).sort_values(ascending=False)

var = var.set_index(['string', 'string', 'string'])

var.head(10)

var.index[:10]

var.index.is_unique

var.loc[2007, 'string', 'string']

var = pd.read_csv('string', index_col=['string', 'string'])

var.head(10)

var.loc['string']

var = pd.DataFrame(np.arange(12).reshape((4, 3)), index=[['string',

    'string', 'string', 'string'], [1, 2, 1, 2]], columns=[['string',

    'string', 'string'], ['string', 'string', 'string']])

var

var.index.names = ['string', 'string']

var.columns.names = ['string', 'string']

var

var.loc['string', 'string']

var.loc[('string', 2), 'string']

var.swaplevel('string', 'string').head()

var.sortlevel('string', ascending=False).head()

var = pd.Series([np.nan, -3, None, 'string'])

var

var.isnull()

var

var.dropna()

var.isnull()

var[var.notnull()]

var.dropna()

var.dropna(how='string')

var.loc[7, 'string'] = np.nan

var

var.dropna(thresh=5)

var.dropna(axis=1)

var.fillna(0)

var.fillna({'string': 2013, 'string': 2})

var.year.fillna(2013, inplace=True)

var

var.fillna(method='string')

var.sum()

var.mean()

var

var.mean()

var.mean(skipna=False)

var = var[['string', 'string', 'string']].sum(axis=1)

var.sort_values(ascending=False)

var.describe()

var.player.describe()

var.hr.cov(var.X2b)

var.hr.corr(var.X2b)

var.ab.corr(var.h)

var.corr()

var.head()

var.sum(level='string')

var.to_csv('string')

var.to_pickle('string')

pd.read_pickle('string')

import import as import

var = [pd.DataFrame(columns=['string', 'string', 'string']) for var in range(3)

    ]

var = 'string'

var = var[0]

for var in os.listdir(var):

    var = pd.read_csv(var + var, parse_dates=['string'])

    var = var.Date[0]

    var = var[(var.Description == 'string') | (var.Description == 'string')]

    var.index = var.Date

    var = int(var.loc[var, 'string'])

    var = var[var.Description == 'string']

    var.index = var.Date

    var = int(var.loc[var, 'string'])

    var.loc[var] = [var, var, 'string']

var = 'string'

var = var[1]

for var in os.listdir(var):

    var = pd.read_csv(var + var, parse_dates=['string']).fillna(0)

    var = var.Date[0]

    var.index = var.Variable

    var = int(var.loc['string', 'string'])

    var = int(var.loc['string', 'string'])

    var.loc[var] = [var, var, 'string']

var = 'string'

var = var[2]

for var in os.listdir(var):

    var = pd.read_csv(var + var, parse_dates=['string']).fillna(0)

    var = var.date[0]

    var.index = var.variable

    var = int(var.loc['string', 'string'])

    var = int(var.loc['string', 'string'])

    var.loc[var] = [var, var, 'string']

var = []

for var in var:

    var.index.name = 'string'

    var.append(var.set_index(['string', var.index]))

pd.concat(var, axis=0)

Running Jplag

To run jplag, we need to write all of our files to a directory, and then setup the command with the .jar file that needs to be run on the command line


In [4]:
import os
for i in range(len(hw_code)):
    if i < 2:
        continue
    base_name = 'plagiarism/homework_code_cleaned_hw2plus/hw' + str(i) + '_'
    for j, code_body in enumerate(hw_code[i]):
        fname = base_name + 'student_' + str(j) + ".py"
        f = open(fname,'w')
        f.write(code_body)
        f.close

In [10]:
import os
jar_file = 'plagiarism/jplag-2.11.9-SNAPSHOT-jar-with-dependencies.jar'
lang = 'python3'
results = 'plagiarism/results_cleaned_hw2plus'
students = 'plagiarism/homework_code_cleaned_hw2plus'
command = "java -jar " + jar_file + " -l " + lang + " -r " + results + " -s " + students + " -m 20"

In [11]:
print("nohup",command,"> plagiarism/experiment_cleaned_hw2plus.out &")


nohup java -jar plagiarism/jplag-2.11.9-SNAPSHOT-jar-with-dependencies.jar -l python3 -r plagiarism/results_cleaned_hw2plus -s plagiarism/homework_code_cleaned_hw2plus -m 20 > plagiarism/experiment_cleaned_hw2plus.out &

After we run the JPlag command

While JPlag produces a nice report that is human readable, we want the pairwise similarities, which are printed out by JPlag as it runs. By parsing the output file we can get these similarities that we will use for prediction


In [12]:
output = open('plagiarism/experiment_cleaned_hw2plus.out','r')
lines = [line for line in output if line[:9] == 'Comparing']

In [13]:
len(lines)


Out[13]:
27261

In [14]:
# Create the dictionary of pairwise sims
my_dict = {}
for line in lines:
    hw1 = line.split()[1].split('-')[0].split('.')[0]
    hw2 = line.split()[1].split('-')[1].split('.')[0]
    val = line.split()[2]
    if hw1 not in my_dict:
        my_dict[hw1] = {}
    if hw2 not in my_dict:
        my_dict[hw2] = {}
    my_dict[hw1][hw2] = val
    my_dict[hw2][hw1] = val

Inter and Intra Similarities

The first measure that we can use to determine if something reasonable is happening is to look at, for each homework, the average similarity of two notebooks both pulled from that homework, and the average similarity of a notebook pulled from that homework and any notebook in the corpus not pulled from that homework. These are printed below


In [16]:
import numpy as np
def get_avg_inter_intra_sims(sim_dict, hw):
    cur_hw = 'hw' + str(hw)
    in_vals = []
    out_vals = []
    for key in sim_dict.keys():
        if key[:3] != cur_hw:
            continue
        for key2 in sim_dict[key].keys():
            if key2[:3] != cur_hw:
                out_vals.append(float(sim_dict[key][key2]))
            else:
                in_vals.append(float(sim_dict[key][key2]))
    return in_vals, out_vals


for i in range(2,6):
    intra_sims, inter_sims = get_avg_inter_intra_sims(my_dict, i)
    print('Mean intra similarity for hw',i,'is',np.mean(intra_sims),'with std',np.std(intra_sims))
    print('Mean inter similarity for hw',i,'is',np.mean(inter_sims),'with std',np.std(inter_sims))
    print('----')


Mean intra similarity for hw 2 is 4.827964967513611 with std 3.0326538887152887
Mean inter similarity for hw 2 is 4.945174880652429 with std 2.8757815993216207
----
Mean intra similarity for hw 3 is 6.608522854120397 with std 3.672315918181055
Mean inter similarity for hw 3 is 5.419221653704601 with std 3.0264227038294482
----
Mean intra similarity for hw 4 is 7.35182069006429 with std 3.770690031727822
Mean inter similarity for hw 4 is 5.7433097468765135 with std 3.0676886091054296
----
Mean intra similarity for hw 5 is 7.5921127759225655 with std 3.907326584269893
Mean inter similarity for hw 5 is 5.573344589860893 with std 3.0099291090951388
----

In [17]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = 5, 10
def get_all_sims(sim_dict, hw):
    cur_hw = 'hw' + str(hw)
    sims = []
    for key in sim_dict.keys():
        for key2 in sim_dict[key].keys():
            if key[:3] != cur_hw and key2[:3] != cur_hw:
                continue
            sims.append(float(sim_dict[key][key2]))
    return sims
fig, axes = plt.subplots(6)
for i in range(2,6):
    axes[i].hist(get_all_sims(my_dict,i), bins=50)


Actual Prediction

While the above results are helpful, it is better to use a classifier that uses more information. The setup is as follows:

  1. Split the data into train and test
  2. For each notebook, generate a feature vector that is calculated as the similarity between the notebook and each notebook of the train set
  3. Build a random forest classifier that uses this feature representation, and measure the performance

In [18]:
from sklearn.model_selection import train_test_split
features = [key for key in my_dict]
feature_map = {}
test_features = set()

indices = [i for i in range(len(features))]
#import pdb; pdb.set_trace()
train, test = train_test_split(indices, test_size=.2)
for i in test:
    test_features.add(features[i])
train_features = []
for i in train:
    train_features.append(features[i])
for i, el in enumerate(train_features):
    feature_map[el] = i
    
X = np.zeros((len(train),len(train)))
y = []
X_test = np.zeros((len(test), len(train)))
y_test = []
for i, el in enumerate(train_features):
    for key in my_dict[el]:
        if key not in feature_map:
            continue
        loc = feature_map[key]
        X[i, loc] = my_dict[el][key]
    y.append(int(el[2]))

for i, el in enumerate(test_features):
    for key in my_dict[el]:
        if key not in feature_map:
            continue
        loc = feature_map[key]
        X_test[i, loc] = my_dict[el][key]
    y_test.append(int(el[2]))

In [19]:
import sklearn
from sklearn.ensemble import RandomForestClassifier

clf = sklearn.ensemble.RandomForestClassifier(n_estimators=400, max_depth=4)
clf.fit(X, y)


Out[19]:
RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=4, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=400, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)

In [20]:
clf.predict(X_test)


Out[20]:
array([3, 5, 4, 5, 3, 2, 3, 4, 4, 5, 2, 2, 5, 2, 5, 3, 2, 2, 2, 4, 5, 4,
       4, 4, 4, 2, 2, 3, 4, 2, 4, 5, 3, 3, 2, 3, 4, 2, 3, 4, 2, 4, 2, 5,
       2, 5, 3])

Results

Below are the results of the prediction. We can see a good deal of predictive power, though there is room for improvement


In [21]:
import numpy as np
np.sum(clf.predict(X_test)==y_test)/len(y_test)


Out[21]:
0.5957446808510638

In [22]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(clf.predict(X_test),y_test)
import matplotlib.pyplot as plt
%matplotlib inline
plt.imshow(cm, cmap=plt.cm.Blues)
plt.show()



In [23]:
clfi = clf.feature_importances_
sa = []
for i in range(len(clfi)):
    sa.append((clfi[i], train_features[i]))
sra = [el for el in reversed(sorted(sa))]

In [24]:
for i in range(100):
    print(sra[i])


(0.031378740272001875, 'hw5_student_31')
(0.029940959991165722, 'hw4_student_44')
(0.024654139203015468, 'hw5_student_52')
(0.024088611149040724, 'hw4_student_23')
(0.020767181441547287, 'hw5_student_22')
(0.01969705068282446, 'hw4_student_27')
(0.019605147940309132, 'hw3_student_2')
(0.019001398037568992, 'hw5_student_8')
(0.018819067868530645, 'hw5_student_27')
(0.01705566889635315, 'hw3_student_55')
(0.01675168600450855, 'hw5_student_47')
(0.016453862962746993, 'hw5_student_7')
(0.01633731032675901, 'hw5_student_42')
(0.015210407754783406, 'hw5_student_2')
(0.01401630698886825, 'hw5_student_36')
(0.013886569509452794, 'hw5_student_26')
(0.013557781010912642, 'hw3_student_1')
(0.012902406096220741, 'hw4_student_20')
(0.012137654019817822, 'hw5_student_12')
(0.012088556636135958, 'hw3_student_41')
(0.012025078923189275, 'hw3_student_7')
(0.011596569483279784, 'hw3_student_18')
(0.011540545283986342, 'hw5_student_45')
(0.010379057948967731, 'hw3_student_36')
(0.010305110608634361, 'hw5_student_44')
(0.009056399462138742, 'hw5_student_10')
(0.008870797210322294, 'hw3_student_20')
(0.00870253898724688, 'hw5_student_19')
(0.008275768304981694, 'hw4_student_17')
(0.007779825771604281, 'hw5_student_3')
(0.007691126520162556, 'hw5_student_48')
(0.007567184713682507, 'hw4_student_19')
(0.007481384080006379, 'hw4_student_46')
(0.007372305594029156, 'hw4_student_52')
(0.007119911917475515, 'hw4_student_12')
(0.00704193566067574, 'hw5_student_32')
(0.007026856666042813, 'hw3_student_12')
(0.007004922125621497, 'hw4_student_35')
(0.0069701834500636465, 'hw4_student_2')
(0.0068426505928337825, 'hw2_student_24')
(0.006818159062634757, 'hw5_student_15')
(0.006644313503037042, 'hw3_student_52')
(0.006492158356857059, 'hw3_student_48')
(0.006444402670196472, 'hw4_student_22')
(0.006429131978275753, 'hw5_student_24')
(0.006424918671576197, 'hw2_student_29')
(0.006364423578732523, 'hw5_student_46')
(0.006181221231770368, 'hw3_student_33')
(0.00606456552852785, 'hw3_student_28')
(0.006033772636096254, 'hw3_student_32')
(0.00587725489489773, 'hw3_student_43')
(0.005682725332593182, 'hw5_student_29')
(0.005539272189555089, 'hw2_student_31')
(0.005522672890865799, 'hw5_student_51')
(0.005316947581590998, 'hw4_student_48')
(0.0053067248348179765, 'hw4_student_24')
(0.005290957555399532, 'hw4_student_58')
(0.005260504826306096, 'hw3_student_13')
(0.005184342892639721, 'hw3_student_47')
(0.005143604536972876, 'hw4_student_18')
(0.0050473129395066365, 'hw4_student_11')
(0.004990926673520409, 'hw5_student_57')
(0.004870935130794322, 'hw4_student_47')
(0.0048273832428773, 'hw5_student_23')
(0.0048094402941950614, 'hw5_student_54')
(0.004779473076646864, 'hw3_student_29')
(0.004718552202113451, 'hw3_student_49')
(0.004698822548923999, 'hw4_student_41')
(0.004680523104748015, 'hw5_student_16')
(0.004647867627179419, 'hw3_student_35')
(0.0045348959058653885, 'hw3_student_5')
(0.004389089378567368, 'hw3_student_25')
(0.0043360974183634885, 'hw3_student_16')
(0.004334116158646346, 'hw5_student_34')
(0.004240825789197011, 'hw4_student_49')
(0.004224115683069109, 'hw3_student_3')
(0.004191207220754221, 'hw4_student_34')
(0.004056595620812017, 'hw5_student_28')
(0.003993566139956056, 'hw4_student_36')
(0.003971876206017693, 'hw3_student_40')
(0.003888949716087934, 'hw4_student_57')
(0.003872200697122904, 'hw3_student_14')
(0.0038639414114750403, 'hw4_student_30')
(0.0038110334814657598, 'hw5_student_41')
(0.0038074058064962737, 'hw4_student_9')
(0.0037941171688892997, 'hw5_student_14')
(0.0037631401524796694, 'hw3_student_42')
(0.003750399268871123, 'hw2_student_40')
(0.0037391073699279465, 'hw4_student_33')
(0.003711948519812981, 'hw3_student_46')
(0.003657176655889858, 'hw4_student_51')
(0.0036452400214495966, 'hw4_student_50')
(0.0035979864143054346, 'hw2_student_9')
(0.003472689127270445, 'hw4_student_26')
(0.0033959382080704596, 'hw4_student_16')
(0.0033843581309025423, 'hw2_student_28')
(0.0033766423810045586, 'hw4_student_29')
(0.0033600360967813493, 'hw5_student_20')
(0.0033060033109307708, 'hw3_student_19')
(0.003249315804679122, 'hw3_student_24')

In [ ]: